## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.3.6 ✔ purrr 0.3.4
## ✔ tibble 3.2.1 ✔ dplyr 1.1.2
## ✔ tidyr 1.2.0 ✔ stringr 1.4.1
## ✔ readr 2.1.2 ✔ forcats 0.5.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
##
## Attaching package: 'lubridate'
##
##
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
##
##
## Loading required package: permute
##
## Loading required package: lattice
##
## This is vegan 2.6-4
This script cleans and visualizes Invert data from the TASR monitoring project using data from 2015-2021. The end goal is to re-produce a non-metric multidimensional scaling (MDS) plot similar to the one in Tronstad et al. 2020. To re-create that plot, the first step is to calculate Average density across replicates/fractions for each taxon and year for each site.
Currently, data is in two different formats:
2015-2018 - “cleaned” format has columns: stream, name, type, site, year, date, area, fraction (large or small), Sampler, NoSurber, Rep (1, 2, or 3), Taxa, Biomass, Abundance, and Density.
2019-2021 - “raw” data has columns: park, sampler, stream, date, rep (1, 2, or 3), fraction (large or small), Taxa, L1:L20 (# of L columns varies), MeanLength(mm), IndBiomass(mg_ind), Subsample, Abundance, Density, Biomass(mg/m2), stage, and Notes. (some years also have “W1…Wn” columns)
A few additional notes: - Both Upper and Lower sites exist for 2015 - this script ONLY uses data from the Upper sites. - Adult taxa (noted in the “stage” column) will be removed before calculating densities.
In both cases, cleaning steps will be to:
NOTE - This script will only run if ALL input files have columns “Stream”, “Year”, “Taxa”, “Abundance”, “Density”, and “Stage” (at a minimum - other columns don’t need to be the same). If one of these columns is missing, add a blank column with the appropriate name to the .csv file in Excel. For The 2019-2021 files, I manually added a “Year” column with the corresponding year to each file in Excel. If one of these columns has a different name (e.g. Density(ind/m2) instead of Density), rename the column in the .csv file BEFORE running the script.
First, make a list of all file names:
invert_files <- list.files("invert_data/raw_data/csv", full.names = T)
Initiate empty list for data import:
invert_list <- list()
Read in and clean data files as outlined above using a for loop: NOTE - not going to calculate mean density/total abundance yet b/c of inconsistent stream names between different years.
for(i in 1:length(invert_files)){
invert_list[[i]]<-read.csv(invert_files[[i]])%>% #store first .csv in file name list as first data frame in invert_list
filter(!(Stage == "adult"| Stage == "Adult"))%>% #1. Remove any observations where "Stage" = adult OR Adult
select(Stream, Year, Taxa, Density, Abundance) #2. Select desired columns: stream, year, taxa, and density
}
Combine all density calculations into a “master” dataframe for N-MDS plotting + additional analysis:
invert_rbind <- invert_list %>%
bind_rows()%>%
arrange(Stream, Year)
Checking for inconsistent site names:
unique(invert_rbind$Stream)
## [1] "" "AK Basin Icy Seep"
## [3] "AK Basin South RG" "Alaska Basin North Rock Glacier"
## [5] "Alaska Basin RG" "Alaska Basin South Rock Glacier"
## [7] "Beartooth Cr" "Cloudveil"
## [9] "Cloudveil Dome" "Death Rock Glacier 4"
## [11] "Delta Inlet Stream" "Delta Lake Inlet"
## [13] "Delta Lake Inlet Stream" "Froze to Death"
## [15] "Grizzly" "Grizzly Inlet Stream"
## [17] "Grizzly Stream" "Gusher"
## [19] "Mica Lake " "Middle Teton"
## [21] "NF Teton Ck" "NF Teton Cr"
## [23] "NF Teton Creek" "North Fork Teton Creek"
## [25] "Paintbrush RG" "Paintbrush Rock Glacier"
## [27] "Petersen Glacier" "Quad Creek"
## [29] "SF Teton Creek" "Silver Run"
## [31] "Silver Run Snowmelt" "Skillet Glacier"
## [33] "Skillet Glacier Stream" "South Cascade"
## [35] "South Cascade RG" "South Cascade Rock Glacier 6"
## [37] "South Fork Teton Creek" "The Gusher"
## [39] "Wind Cave"
Lots of inconsistency in site naming, plus some rows with no site name. Next steps: delete rows with no site name; standardize other names
Remove rows w/out stream name:
invert_rbind <- invert_rbind%>%
filter(!Stream == "")
Standardize stream names:
invert_rename <- invert_rbind %>%
mutate(Stream = case_when( # Using case_when + str_detect (stringr package) to rename all cells that contain a given text string to the specified new value - a little clunky, but effective.
str_detect(Stream, "Delta") ~ "delta",
str_detect(Stream, "Cloudveil") ~ "cloudveil",
str_detect(Stream, "Basin South")|str_detect(Stream, "Icy Seep")|str_detect(Stream, "Basin RG") ~ "s_ak_basin",
str_detect(Stream, "Basin North")~ "n_ak_basin",
str_detect(Stream, "Grizzly")~ "grizzly",
str_detect(Stream, "Mica") ~ "mica",
str_detect(Stream, "North Fork Teton")|str_detect(Stream, "NF Teton")~"n_teton",
str_detect(Stream, "Paintbrush")~"paintbrush",
str_detect(Stream, "Quad")~"quad_cr",
str_detect(Stream, "Silver") ~"silver_run",
str_detect(Stream, "Cascade")~"s_cascade",
str_detect(Stream, "South Fork Teton")|str_detect(Stream, "SF Teton")~"s_teton",
str_detect(Stream, "Froze")~"froze_to_death",
str_detect(Stream, "Middle")~"mid_teton",
str_detect(Stream, "Skillet")~"skillet",
str_detect(Stream, "Gusher")~"gusher",
str_detect(Stream, "Beartooth")~"beartooth_cr",
str_detect(Stream, "Death Rock")~"death_canyon",
str_detect(Stream, "Petersen")~"petersen",
str_detect(Stream, "Wind")~"windcave"
))
Check the output:
unique(invert_rename$Stream)
## [1] "s_ak_basin" "n_ak_basin" "beartooth_cr" "cloudveil"
## [5] "death_canyon" "delta" "froze_to_death" "grizzly"
## [9] "gusher" "mica" "mid_teton" "n_teton"
## [13] "paintbrush" "petersen" "quad_cr" "s_teton"
## [17] "silver_run" "skillet" "s_cascade" "windcave"
Also need to check for different spellings/etc of Taxa names:
invert_rename1 <- invert_rename %>%
mutate(taxa_lwr = tolower(Taxa))%>% #make all lowercase first to get rid of some duplicates w/ different cases
mutate(taxa_lwr = str_replace(taxa_lwr, "pupae", ""))%>% #remove "pupae" label from all names
mutate(taxa_lwr = str_replace(taxa_lwr, "pupa", ""))%>% #remove "pupa" label from all names
filter(!str_detect(taxa_lwr, "adult"))%>% #removing any rows with "adult" in any part of the taxa name
arrange(taxa_lwr)
unique(invert_rename1$taxa_lwr) #check unique values
## [1] "1megarcys"
## [2] "2megarcys"
## [3] "acari"
## [4] "agathon"
## [5] "allomyia"
## [6] "alloperla"
## [7] "baetidae"
## [8] "baetis"
## [9] "baetis "
## [10] "baetis "
## [11] "baetis 2"
## [12] "baetis 2 "
## [13] "carabidae"
## [14] "chironomidae"
## [15] "chironomidae "
## [16] "chloroperlidae"
## [17] "chrionomidae "
## [18] "chrysopidae"
## [19] "cinygmula"
## [20] "clinocera"
## [21] "collembola"
## [22] "culicoides"
## [23] "dicranota"
## [24] "diptera"
## [25] "dixella"
## [26] "drunella"
## [27] "drunella "
## [28] "elmidae"
## [29] "empididae"
## [30] "epeorus"
## [31] "gonomyodes"
## [32] "helodon"
## [33] "hemiptera"
## [34] "heteroceridae"
## [35] "heterocloeon"
## [36] "hirudinea"
## [37] "homophylax"
## [38] "homphylax"
## [39] "hydrophilidae"
## [40] "lednia"
## [41] "leuctridae"
## [42] "limnephilidae"
## [43] "limonia"
## [44] "lipogomphus"
## [45] "macroveliidae"
## [46] "megarcys"
## [47] "megarcys2"
## [48] "molophilus"
## [49] "molophilus did not fit well with key"
## [50] "muscidae"
## [51] "nematoda"
## [52] "neothremma"
## [53] "non-tany"
## [54] "non-tanypodinae"
## [55] "oligochaeta"
## [56] "oreogeton"
## [57] "ormosia"
## [58] "ostracoda"
## [59] "paradelphomyia"
## [60] "parapsyche"
## [61] "pentacora"
## [62] "perlodidae"
## [63] "plecoptera"
## [64] "pomoleuctra"
## [65] "probezzia"
## [66] "prosimuliium"
## [67] "prosimulium"
## [68] "psychoglypha"
## [69] "rhithrogena"
## [70] "rhyacophila"
## [71] "rhyacophila "
## [72] "rhyacophila alberta group"
## [73] "rhyacophila arnaudi"
## [74] "rhyacophila atrata complex"
## [75] "rhyacophila betteni group"
## [76] "rhyacophila blarina"
## [77] "rhyacophila brannea/venma group"
## [78] "rhyacophila brunnea group"
## [79] "rhyacophila brunnea group vemna group"
## [80] "rhyacophila brunnea/vemna group"
## [81] "rhyacophila coloradenis"
## [82] "rhyacophila coloradenis group"
## [83] "rhyacophila coloradensis group"
## [84] "rhyacophila ecosa group"
## [85] "rhyacophila hyalinata banks"
## [86] "rhyacophila hyalinata group"
## [87] "rhyacophila rotunda group"
## [88] "rhyacophila sibirica group"
## [89] "rhyacophila vatina complex"
## [90] "rhyacophila ventina complex"
## [91] "rhyacophila verrula"
## [92] "rhyacophila verrula group"
## [93] "rhyacophila verrula milne"
## [94] "rhyacophila vetina group"
## [95] "rhyacophila viquaea group"
## [96] "rhyacophila vobara subgroup"
## [97] "rhyacophila vofixa group"
## [98] "rhyacophila vofixa subgroup vobara subgroup"
## [99] "rithrogena"
## [100] "sarcophagidae"
## [101] "simuliidae"
## [102] "simuliidae "
## [103] "simuliidae (unknown)"
## [104] "simulium"
## [105] "skwala"
## [106] "sphaeridiinae"
## [107] "sphaeriidae"
## [108] "staphylinidae"
## [109] "stenelmis"
## [110] "suwallia"
## [111] "sweltsa"
## [112] "tanypodinae"
## [113] "tipula"
## [114] "tipula (arctotipula)"
## [115] "trichoptera"
## [116] "trichoptera homphylax or allomyia"
## [117] "turbellaria"
## [118] "twinnia"
## [119] "yoraperla"
## [120] "zapada"
Fixing repeats/spelling errors:
invert_rename2 <- invert_rename1%>%
mutate(taxa_lwr = ifelse(str_detect(taxa_lwr, "megarcys"), "megarcys", taxa_lwr))%>%
mutate(taxa_lwr = str_replace(taxa_lwr, "chrionomidae", "chironomidae"))%>%
mutate(taxa_lwr = str_replace(taxa_lwr, "venma", "vemna"))%>%
mutate(taxa_lwr = str_replace(taxa_lwr, "brannea", "brunnea"))%>%
mutate(taxa_lwr = str_replace(taxa_lwr, "homp", "homop"))%>%
mutate(taxa_lwr = ifelse(taxa_lwr == "non-tany", "non-tanypodinae", taxa_lwr))%>%
mutate(taxa_lwr = str_replace(taxa_lwr, "prosimuliium", "prosimulium"))%>%
mutate(taxa_lwr = str_replace(taxa_lwr, "coloradenis", "coloradensis"))%>%
mutate(taxa_lwr = str_replace(taxa_lwr, "group", ""))%>% #remove "group" from all names
mutate(taxa_lwr = str_replace(taxa_lwr, "sub", "subgroup"))%>% #add "group" back into "subgroup" names
mutate(taxa_lwr = str_trim(taxa_lwr, side = "both")) #remove white space from before and after all names
invert_rename2$taxa_lwr[2587] <- "rhyacophila brunnea/vemna" #manually over-writing one cell that wasn't correcting using fxns above for some reasons
unique(invert_rename2$taxa_lwr) #checking resulting unique values
## [1] "megarcys"
## [2] "acari"
## [3] "agathon"
## [4] "allomyia"
## [5] "alloperla"
## [6] "baetidae"
## [7] "baetis"
## [8] "baetis 2"
## [9] "carabidae"
## [10] "chironomidae"
## [11] "chloroperlidae"
## [12] "chrysopidae"
## [13] "cinygmula"
## [14] "clinocera"
## [15] "collembola"
## [16] "culicoides"
## [17] "dicranota"
## [18] "diptera"
## [19] "dixella"
## [20] "drunella"
## [21] "elmidae"
## [22] "empididae"
## [23] "epeorus"
## [24] "gonomyodes"
## [25] "helodon"
## [26] "hemiptera"
## [27] "heteroceridae"
## [28] "heterocloeon"
## [29] "hirudinea"
## [30] "homophylax"
## [31] "hydrophilidae"
## [32] "lednia"
## [33] "leuctridae"
## [34] "limnephilidae"
## [35] "limonia"
## [36] "lipogomphus"
## [37] "macroveliidae"
## [38] "molophilus"
## [39] "molophilus did not fit well with key"
## [40] "muscidae"
## [41] "nematoda"
## [42] "neothremma"
## [43] "non-tanypodinae"
## [44] "oligochaeta"
## [45] "oreogeton"
## [46] "ormosia"
## [47] "ostracoda"
## [48] "paradelphomyia"
## [49] "parapsyche"
## [50] "pentacora"
## [51] "perlodidae"
## [52] "plecoptera"
## [53] "pomoleuctra"
## [54] "probezzia"
## [55] "prosimulium"
## [56] "rhyacophila brunnea/vemna"
## [57] "psychoglypha"
## [58] "rhithrogena"
## [59] "rhyacophila"
## [60] "rhyacophila alberta"
## [61] "rhyacophila arnaudi"
## [62] "rhyacophila atrata complex"
## [63] "rhyacophila betteni"
## [64] "rhyacophila blarina"
## [65] "rhyacophila brunnea"
## [66] "rhyacophila brunnea vemna group"
## [67] "rhyacophila coloradensis"
## [68] "rhyacophila ecosa"
## [69] "rhyacophila hyalinata banks"
## [70] "rhyacophila hyalinata"
## [71] "rhyacophila rotunda"
## [72] "rhyacophila sibirica"
## [73] "rhyacophila vatina complex"
## [74] "rhyacophila ventina complex"
## [75] "rhyacophila verrula"
## [76] "rhyacophila verrula milne"
## [77] "rhyacophila vetina"
## [78] "rhyacophila viquaea"
## [79] "rhyacophila vobara subgroup"
## [80] "rhyacophila vofixa"
## [81] "rhyacophila vofixa subgroup vobara subgroup"
## [82] "rithrogena"
## [83] "sarcophagidae"
## [84] "simuliidae"
## [85] "simuliidae (unknown)"
## [86] "simulium"
## [87] "skwala"
## [88] "sphaeridiinae"
## [89] "sphaeriidae"
## [90] "staphylinidae"
## [91] "stenelmis"
## [92] "suwallia"
## [93] "sweltsa"
## [94] "tanypodinae"
## [95] "tipula"
## [96] "tipula (arctotipula)"
## [97] "trichoptera"
## [98] "trichoptera homophylax or allomyia"
## [99] "turbellaria"
## [100] "twinnia"
## [101] "yoraperla"
## [102] "zapada"
Now, calculate average density and total abundance for each taxa for each year in each site; add source info:
invert_avgs <- invert_rename2 %>%
group_by(Stream, Year, taxa_lwr) %>% # group by taxa w/in year w/in stream
summarise(density_xbar = mean(Density), #calculate mean density for each taxa/year/site
abundance_total = sum(Abundance))#calculate total abundance for each taxa/yr/site
## `summarise()` has grouped output by 'Stream', 'Year'. You can override using
## the `.groups` argument.
source <- read.csv("source_info.csv")%>% #read in source data
rename(Stream = stream) #rename for merge
invert_avgs <- merge(invert_avgs, source)%>%
filter(!density_xbar == 0)%>% #remove extra taxa names that have no observations
arrange(Stream, Year) #sort by stream & year
Save this file for future analysis:
write.csv(invert_avgs, "invert_data/cleaned_csv/full_invert_densities.csv")
Updates 4/27: Use all sites w/ 2+yrs of data; use log transform of density measurements; remove taxa that only occur at one site; remove taxa that represent <1% of total abundance
Remove sites with only one year of data
First, ID which sites have 2+ yrs of data:
site_lengths <- invert_avgs %>%
group_by(Stream)%>%
summarise(first = min(Year),
last = max(Year))%>%
mutate(monitoring_yrs = (last-first)+1)%>%
arrange(desc(monitoring_yrs))
site_lengths
## # A tibble: 13 × 4
## Stream first last monitoring_yrs
## <chr> <int> <int> <dbl>
## 1 mid_teton 2015 2022 8
## 2 n_teton 2015 2022 8
## 3 s_cascade 2015 2022 8
## 4 s_teton 2015 2022 8
## 5 windcave 2015 2022 8
## 6 paintbrush 2016 2022 7
## 7 s_ak_basin 2016 2022 7
## 8 delta 2017 2022 6
## 9 grizzly 2017 2022 6
## 10 skillet 2017 2022 6
## 11 silver_run 2017 2021 5
## 12 cloudveil 2019 2022 4
## 13 gusher 2020 2022 3
Make a list of site names w/ only one year of data:
one.yr <- site_lengths %>%
filter(monitoring_yrs < 2)%>% #select sites w/ <2yrs data
select(Stream) #remove extra cols
Remove all observations from streams with one year of data:
two_plus <- invert_avgs %>%
filter(!Stream%in%one.yr$Stream) #remove all rows with a value of "Stream" that occurs "in" the one.yr$stream vector created above.
Remove taxa that only occur at one site
Calculate # of different sites @ which each taxa occurs; make a list of all taxa occurring at only one site:
one.site <- two_plus %>%
group_by(taxa_lwr)%>% #group by taxa
summarise(no_sites = length(unique(Stream)))%>% #count # of unique site names w/in each taxa group
arrange(no_sites)%>% #sort by site count
filter(no_sites == 1) #extract all taxa occuring at only one site
Remove these taxa from the main dataframe:
no_single <- two_plus %>%
filter(!taxa_lwr%in%one.site$taxa_lwr)
Remove taxa that account for <1% of total abundance
NOTE - using a 1% threshold for each site/year (ie removing any species that makes up <1% of the total abundance for a given site in a given year).
Calculate total abundance per site per year:
overall.abundance <- no_single %>%
group_by(Stream, Year)%>% #group by stream and year
summarise(tot_abund = sum(abundance_total, na.rm = T)) #calculate total abundance w/in each site + year combo
## `summarise()` has grouped output by 'Stream'. You can override using the
## `.groups` argument.
Calculate % of total abundance for each taxa for each year; remove any taxa w/ <1% of total abundance:
no_rare <- merge(no_single, overall.abundance)%>% #add total abundance for each site + year to main dataset
mutate(ab_perc = (abundance_total/tot_abund)*100)%>% #calculate abundance percent
filter(!ab_perc < 1) #remove all taxa with abundance percent <1
Calculate log transform of remaining density data:
nmds_data <- no_rare %>%
mutate(log10_density = log10(density_xbar))%>%
select(Stream, Year, taxa_lwr, log10_density)
Goal is to run an NMDS treating each stream+year combo as a unique “site” - will need: - Columns with stream name and year (for plotting later) - ID column with stream name + year (to use as the “site” identifier in the NMDS) - Columns for each taxa, rows populated with density for that combo of site/year and taxa
Create site + year index column; pivot wider to get columns for each taxa:
nmds_wide <- nmds_data %>%
pivot_wider(id_cols = c(Stream, Year), values_from = log10_density, names_from = taxa_lwr)%>%
mutate_if(is.numeric, ~replace_na(.,0))
Note - still have some species that only occur @ 1 site in this data after removing taxa that account for <1% of total abundance at each site/yr
Run the NMDS using the metaMDS fuction (vegan pkg):
set.seed(1) #for reproducability
full.mds <- metaMDS(comm = nmds_wide[,3:18], #specify community as columns 4-18
distance = "bray") #use bray-curtis distance calc
## Run 0 stress 0.231354
## Run 1 stress 0.2594342
## Run 2 stress 0.2331558
## Run 3 stress 0.2331558
## Run 4 stress 0.2315119
## ... Procrustes: rmse 0.01424533 max resid 0.1192305
## Run 5 stress 0.2296257
## ... New best solution
## ... Procrustes: rmse 0.0391613 max resid 0.2150159
## Run 6 stress 0.2677292
## Run 7 stress 0.2638564
## Run 8 stress 0.234212
## Run 9 stress 0.2888573
## Run 10 stress 0.2293767
## ... New best solution
## ... Procrustes: rmse 0.005982509 max resid 0.04196518
## Run 11 stress 0.2821153
## Run 12 stress 0.2297398
## ... Procrustes: rmse 0.008697491 max resid 0.04865558
## Run 13 stress 0.2292641
## ... New best solution
## ... Procrustes: rmse 0.006602499 max resid 0.0443646
## Run 14 stress 0.2319655
## Run 15 stress 0.2508373
## Run 16 stress 0.2529171
## Run 17 stress 0.229264
## ... New best solution
## ... Procrustes: rmse 0.000137666 max resid 0.0008967715
## ... Similar to previous best
## Run 18 stress 0.2331558
## Run 19 stress 0.2442452
## Run 20 stress 0.2578503
## *** Best solution repeated 1 times
full.mds
##
## Call:
## metaMDS(comm = nmds_wide[, 3:18], distance = "bray")
##
## global Multidimensional Scaling using monoMDS
##
## Data: nmds_wide[, 3:18]
## Distance: bray
##
## Dimensions: 2
## Stress: 0.229264
## Stress type 1, weak ties
## Best solution was repeated 1 time in 20 tries
## The best solution was from try 17 (random start)
## Scaling: centring, PC rotation, halfchange scaling
## Species: expanded scores based on 'nmds_wide[, 3:18]'
Data set-up: extract point locations for plot creation:
mds_scores <- as.data.frame(scores(full.mds)$site)%>% #extract point locations for each site (stream*year combo)
mutate(stream = nmds_wide$Stream,
year = nmds_wide$Year)#%>%#add columns for stream, and year
#mutate(stream_code = case_when(stream == "mid_teton"~"MT", stream == "n_teton" ~"NT", stream == "s_ak_basin"~"AK", stream == "s_cascade"~"SC", stream == "s_teton" ~ "ST", stream == "windcave" ~ "WC")) #Add "stream_code" column for point labeling
mds_species <- as.data.frame(scores(full.mds)$species)%>% #Extract point locations for each species
rownames_to_column(var = "species") #convert rownames to a column w/species names
Create plots:
first pass: putting everything on one plot:
Add source info:
source <- read.csv("source_info.csv") #read in sources
mds_scores <- merge(mds_scores, source) #merge source and stream code info with scores dataframe
#new dataframe for adding site labels (1 per site):
site_labels <- mds_scores %>%
group_by(stream, stream_code, source) %>% # group by stream
summarise(NMDS1 = mean(NMDS1),
NMDS2 = mean(NMDS2)) %>% #extract NMDS coordinates for first observation for each stream
ungroup() #ungroup
## `summarise()` has grouped output by 'stream', 'stream_code'. You can override
## using the `.groups` argument.
Calculating convex hulls for each group:
# Find the convex hull of the points being plotted
hull <- mds_scores %>%
group_by(stream) %>%
slice(chull(NMDS1, NMDS2))
Plotting:
Color by site; diff shapes = diff years
nmds.site <- ggplot(mds_scores)+
geom_point(data = mds_species, aes(x = NMDS1, y = NMDS2), alpha = 0.5, size = 1)+ #add points for each species, adjust opacity & size
geom_text(data = mds_species, aes(x = NMDS1, y = NMDS2, label = species), #add labels to each taxa point
size = 3, color = "Grey", hjust = 0.5, vjust = -0.5, fontface = "italic")+ #adjusting size, color, position, and face of taxa labels
geom_point(data = mds_scores, aes(x = NMDS1, y = NMDS2, color = stream, shape = as.factor(year)), size = 3)+ #add points for each site (stream+year combo); color code by stream, different shapes for different years.
#geom_text(data = mds_scores, aes(x = NMDS1, y = NMDS2, label = stream_code), #add stream code labels to each site point
# hjust = 0, vjust = -1, fontface = "bold")+ #adjusting position and face of stream code labels
geom_polygon(data = hull, alpha = 0.2, aes(x = NMDS1, y = NMDS2, fill = stream, color = stream))+ #add hulls for each group
annotate(geom = "text", x = -0.6, y = 1.2, size = 4, fontface = "italic", label = paste("Stress: ", round(full.mds$stress, digits = 3)))+ #adding label to plot with stress score from model
theme_classic()+
theme(legend.position = "right")+
labs(color = "Stream", shape = "Year")+
scale_color_manual(values = c(brewer.pal(12, "Set3"), "#000000"), name = "Stream"
#, labels = c("Middle Teton", "North Teton","AK Basin S", "South Cascade", "South Teton", "Wind Cave")
)+ #adjusting color palatte and labels
scale_shape_manual(values = c(15, 16, 17, 3, 4, 18, 6, 7))+ #setting shape palatte
scale_fill_manual(values = c(brewer.pal(12, "Set3"), "#000000"), name = "Stream")+ #setting fill pallate
labs(fill = "Stream", color = "Stream", shape = "Year")
nmds.site
ggsave("invert_data/plots/nmds_site.pdf", nmds.site, device = "pdf", width = 6.5, height = 5, units = "in", dpi = "retina")
Color by source; shape = year; label with stream codes
source.pal <- c("#C6DBEF" , "#4292C6" , "#08306B", "#000000")
nmds.source <- ggplot(mds_scores)+
geom_point(data = mds_species, aes(x = NMDS1, y = NMDS2), alpha = 0.5, size = 1)+ #add points for each species, adjust opacity & size
geom_text(data = mds_species, aes(x = NMDS1, y = NMDS2, label = species), #add labels to each taxa point
size = 3, color = "Grey", hjust = 0.5, vjust = -0.5, fontface = "italic")+ #adjusting size, color, position, and face of taxa labels
geom_point(data = mds_scores, aes(x = NMDS1, y = NMDS2, color = source, shape = as.factor(year)), size = 3)+ #add points for each site; color code by source, different shapes for different years.
geom_text(data = site_labels, aes(x = NMDS1, y = NMDS2, label = stream_code, color = source), #add stream code labels to each site point
hjust = 0, vjust = 0, fontface = "bold")+ #adjusting position and face of stream code labels
geom_polygon(data = hull, alpha = 0.2, aes(x = NMDS1, y = NMDS2, fill = source, color = source))+ #add hulls for each stream, color coded by source
annotate(geom = "text", x = -0.6, y = 1.2, size = 4, fontface = "italic", label = paste("Stress: ", round(full.mds$stress, digits = 3)))+ #adding label to plot with stress score from model
theme_classic()+
theme(legend.position = "right")+
labs(fill = "Source", color = "Source", shape = "Year")+
scale_color_manual(values = source.pal, name = "Source"
, labels = c("Glacier", "Snowmelt", "Subterranean Ice", "Unknown")
)+ #adjusting color palatte and labels
scale_shape_manual(values = c(15, 16, 17, 3, 4, 18, 6, 7))+ #setting shape palatte
scale_fill_manual(values = source.pal, name = "Source", labels = c("Glacier", "Snowmelt", "Subterranean Ice", "Unknown")) #setting fill pallate
nmds.source
ggsave("invert_data/plots/nmds_source.pdf", nmds.source, device = "pdf", width = 9.5, height = 6.5, units = "in", dpi = "retina")
Facet by source:
source.facet <- ggplot(mds_scores)+
geom_point(data = mds_species, aes(x = NMDS1, y = NMDS2), alpha = 0.5, size = 1)+ #add points for each species, adjust opacity & size
geom_text(data = mds_species, aes(x = NMDS1, y = NMDS2, label = species), #add labels to each taxa point
size = 3, color = "Grey", hjust = 0.5, vjust = -0.5, fontface = "italic")+ #adjusting size, color, position, and face of taxa labels
geom_point(data = mds_scores, aes(x = NMDS1, y = NMDS2, color = source, shape = as.factor(year)), size = 3)+ #add points for each site; color code by source, different shapes for different years.
geom_text(data = site_labels, aes(x = NMDS1, y = NMDS2, label = stream_code), #add stream code labels to each site point
hjust = 0, vjust =0, fontface = "bold")+ #adjusting position and face of stream code labels
geom_polygon(data = hull, alpha = 0.2, aes(x = NMDS1, y = NMDS2, fill = source, color = source))+ #add hulls for each stream, color coded by source
theme_classic()+
theme(legend.position = "right",
plot.tag.position = c(0.87, 0.1),
plot.tag = element_text(size = 10, face = "italic"))+
labs(fill = "Source", color = "Source", shape = "Year", tag = paste("Stress: ", round(full.mds$stress, digits = 3)))+
scale_color_manual(values = source.pal, name = "Source",labels = c("Glacier", "Snowmelt", "Subterranian Ice", "Unknown"))+ #adjusting color palatte and labels
scale_shape_manual(values = c(15, 16, 17, 3, 4, 18, 6, 7))+ #setting shape palatte
scale_fill_manual(values = source.pal, labels = c("Glacier", "Snowmelt", "Subterranian Ice", "Unknown"),name = "Source")+ #setting fill pallate
facet_wrap(~source)
source.facet
ggsave("invert_data/plots/nmds_source_facet.pdf", source.facet, device = "pdf", units = "in", height = 5, width = 6.5, dpi = "retina")
Same as above, but only TASR Sites
Data Prep:
tasr.sites <- c("cloudveil", "delta", "grizzly", "gusher", "mid_teton", "n_teton", "paintbrush", "s_ak_basin", "s_cascade", "s_teton", "skillet", "windcave") #make list of TASR sites
tasr_nmds <- mds_scores %>%
filter(stream%in%tasr.sites) #extract data for TASR sites from mds_scores data
tasr_labels <- site_labels%>%
filter(stream%in%tasr.sites) #extract TASR site labels for plotting
pres.pal <- c("#C26ED6", "#F89225", "#76D96F") #matching color pallate to site map
Plotting:
tasr.facet <- ggplot(tasr_nmds)+
geom_point(data = mds_species, aes(x = NMDS1, y = NMDS2), alpha = 0.5, size = 1)+ #add points for each species, adjust opacity & size
geom_text(data = mds_species, aes(x = NMDS1, y = NMDS2, label = species), #add labels to each taxa point
size = 3, color = "Grey", hjust = 0.5, vjust = -0.5, fontface = "italic")+ #adjusting size, color, position, and face of taxa labels
geom_point(data = tasr_nmds, aes(x = NMDS1, y = NMDS2, color = source, shape = as.factor(year)), size = 3)+ #add points for each site; color code by source, different shapes for different years.
geom_text(data = tasr_labels, aes(x = NMDS1, y = NMDS2, label = stream_code), #add stream code labels to each site point
hjust = 0, vjust =0, fontface = "bold")+ #adjusting position and face of stream code labels
geom_polygon(data = hull, alpha = 0.2, aes(x = NMDS1, y = NMDS2, fill = source, color = source))+ #add hulls for each stream, color coded by source
theme_classic()+
theme(legend.position = "right",
plot.tag.position = c(0.87, 0.1),
plot.tag = element_text(size = 10, face = "italic"),
axis.title = element_text(size = 11, face = "bold"),
axis.text = element_text(size = 10),
legend.text = element_text(size = 10),
legend.title = element_text(size = 11, face = "bold"),
strip.text = element_text(size = 10, face = "bold"))+
labs(fill = "Source", color = "Source", shape = "Year", tag = paste("Stress: ", round(full.mds$stress, digits = 3)))+
scale_color_manual(values = pres.pal, name = "Source",labels = c("Glacier", "Snowmelt", "Subterranian Ice", "Unknown"))+ #adjusting color palatte and labels
scale_shape_manual(values = c(15, 16, 17, 3, 4, 18, 6, 7))+ #setting shape palatte
scale_fill_manual(values = pres.pal, labels = c("Glacier", "Snowmelt", "Subterranian Ice", "Unknown"),name = "Source")+ #setting fill pallate
facet_wrap(~source)
tasr.facet
Save:
ggsave("pres_figures/tasr_nmds.pdf", tasr.facet, width = 8.5, height = 6.5, units = "in", device = "pdf", dpi = "retina")
Seperating into facets for each site
nmds.facet <- ggplot(mds_scores)+
#geom_point(data = mds_species, aes(x = NMDS1, y = NMDS2), alpha = 0.5, size = 1)+
#geom_text(data = mds_species, aes(x = NMDS1, y = NMDS2, label = species), size = 3, color = "Grey", hjust = 0.5, vjust = -0.5, fontface = "italic")+
geom_point(aes(x = NMDS1, y = NMDS2, color = as.numeric(year)), size = 2)+
geom_path(mapping = aes(x = NMDS1, y = NMDS2, color = as.numeric(mds_scores$year)), arrow = arrow(type = "closed", ends = "last", length = unit(0.2, "cm")))+
theme_classic()+
facet_wrap(~stream)+
theme(legend.direction = "horizontal",
legend.position = c(0.75, 0.05),
plot.tag.position = c(0.5, 0.1),
plot.tag = element_text(size = 10, face = "italic"),
axis.title = element_text(size = 11, face = "bold"),
axis.text = element_text(size = 10),
legend.text = element_text(size = 10),
legend.title = element_text(size = 11, face = "bold"),
strip.text = element_text(size = 10, face = "bold"))+
labs(color = "Year", tag = paste("Stress: ", round(full.mds$stress, digits = 3)))+
scale_color_gradient(low = "#C6DBEF", high = "#084594", labels = c(2015, 2018, 2021), breaks = c(2015, 2018, 2021))
nmds.facet
## Warning: Use of `mds_scores$year` is discouraged. Use `year` instead.
ggsave("invert_data/plots/nmds_facet.pdf", nmds.facet, device = "pdf", width = 6.5, height = 6.5, units = "in", dpi = "retina")
## Warning: Use of `mds_scores$year` is discouraged. Use `year` instead.
Same as above, but just for TASR sites:*
tasr.nmds.line <- ggplot(tasr_nmds)+
#geom_point(data = mds_species, aes(x = NMDS1, y = NMDS2), alpha = 0.5, size = 1)+
#geom_text(data = mds_species, aes(x = NMDS1, y = NMDS2, label = species), size = 3, color = "Grey", hjust = 0.5, vjust = -0.5, fontface = "italic")+
geom_point(aes(x = NMDS1, y = NMDS2, color = as.numeric(year)), size = 2)+
geom_path(mapping = aes(x = NMDS1, y = NMDS2, color = as.numeric(tasr_nmds$year)), arrow = arrow(type = "closed", ends = "last", length = unit(0.2, "cm")))+
theme_classic()+
facet_wrap(~full_name, nrow = 2, ncol = 6)+
theme(legend.direction = "horizontal",
legend.position = "bottom",
plot.tag.position = c(0.3, 0.1),
plot.tag = element_text(size = 10, face = "italic"),
axis.title = element_text(size = 11, face = "bold"),
axis.text = element_text(size = 10),
legend.text = element_text(size = 10),
legend.title = element_text(size = 11, face = "bold"),
strip.text = element_text(size = 10, face = "bold"))+
labs(color = "Year", tag = paste("Stress: ", round(full.mds$stress, digits = 3)))+
scale_color_gradient(low = "#C6DBEF", high = "#084594", labels = c(2015, 2018, 2021), breaks = c(2015, 2018, 2021))
tasr.nmds.line
## Warning: Use of `tasr_nmds$year` is discouraged. Use `year` instead.
ggsave("pres_figures/tasr_nmds_line.pdf", tasr.nmds.line, width = 12.5, height = 5, units = "in", device = "pdf", dpi = "retina")
## Warning: Use of `tasr_nmds$year` is discouraged. Use `year` instead.
Just plotting first and last year for each site
nmds.simple <- ggplot(filter(mds_scores, year == 2016|year == 2021))+ #filtering data to only include 2016 & 2021
geom_point(data = mds_species, aes(x = NMDS1, y = NMDS2), alpha = 0.5, size = 1)+
geom_text(data = mds_species, aes(x = NMDS1, y = NMDS2, label = species), size = 3, color = "Grey", hjust = 0.5, vjust = -0.5, fontface = "italic")+
geom_point(data = filter(mds_scores, year == 2016|year == 2021), aes(x = NMDS1, y = NMDS2, color = stream, shape = as.factor(year)), size = 3)+
geom_text(data = filter(mds_scores, year == 2016|year == 2021), aes(x = NMDS1, y = NMDS2, label = stream_code), hjust = -0.3, vjust = 0.1, fontface = "bold")+
annotate(geom = "text", x = -0.7, y = 0.9, size = 4, fontface = "italic", label = paste("Stress: ", round(full.mds$stress, digits = 3)))+
theme_classic()+
theme(legend.position = "right")+
labs(color = "Stream", shape = "Year")+
scale_color_manual(values = c(brewer.pal(12, "Set3"), "#000000")
# , labels = c("Middle Teton", "North Teton","AK Basin S", "South Cascade", "South Teton", "Wind Cave")
)+
scale_shape_manual(values = c(15, 16))
nmds.simple
Save:
ggsave("invert_data/plots/nmds_simple.jpg", nmds.simple, device = "jpeg", width = 6.5, height = 5, units = "in", dpi = "retina") #Save plot
First, need to calculate area of convex hulls, which will require converting them to polygons. Formatting-wise, this means we’ll need to repeat the first row for each site at the end of that site to “close” the polygon:
Split data by site:
hull_split <- split(hull, hull$stream)
Write a function to paste first row at end of dataframe and extract the hull coordinates as a matrix; apply to the split list:
paste.row <- function(df){
df<-rbind(df, df[1,]) #rowbind first row to bottom of dataframe
df<-df %>%
select(2:3)
}
hull_split_closed <- lapply(hull_split, paste.row) #apply to hull_split list
## Adding missing grouping variables: `stream`
## Adding missing grouping variables: `stream`
## Adding missing grouping variables: `stream`
## Adding missing grouping variables: `stream`
## Adding missing grouping variables: `stream`
## Adding missing grouping variables: `stream`
## Adding missing grouping variables: `stream`
## Adding missing grouping variables: `stream`
## Adding missing grouping variables: `stream`
## Adding missing grouping variables: `stream`
## Adding missing grouping variables: `stream`
## Adding missing grouping variables: `stream`
## Adding missing grouping variables: `stream`
Next, calculate area of each site’s polygon using a for loop:
hull_areas<-c() #initialize empty vector to recieve outputs
for(i in 1:length(hull_split_closed)){ #for each object in the hull_split_closed list:
pg <- Polygon(hull_split_closed[[i]][,2:3], hole = F) #use columns 2 and 3 (x and y coords) to create a polygon
hull_areas[i] <- pg@area #calculate area of that polygon and store in the hull_areas vector
}
## Warning in Polygon(hull_split_closed[[i]][, 2:3], hole = F): less than 4
## coordinates in polygon
Add site info back in:
site_areas <- hull %>%
select(stream, source, stream_code)%>% #select desired cols
distinct(stream, source, stream_code) #remove duplicate rows
site_areas$area <- hull_areas #add areas
site_areas <- site_areas %>%
filter(!area == 0) #remove sites with area = 0
Plotting - how do areas compare between sources?
source.areas <- ggplot(site_areas, aes(x = source, y = area, fill = source))+
geom_boxplot()+
theme_classic()+
theme(legend.position = "none",
axis.text = element_text(size = 11),
axis.title= element_text(size = 12, face = "bold"))+
scale_fill_manual(values = source.pal, labels = c("Glacier", "Snowmelt", "Subterranean Ice"))+
labs(y = "Convex Hull Area", x = "Stream Source")
source.areas
Snowmelt appears to have larger area than glacier or subterranean ice - are these differences significant?
Save:
ggsave("invert_data/plots/source_areas.pdf", source.areas, width = 6.5, height = 5, units = "in", device = "pdf", dpi = "retina")
Basic ANOVA:
source.aov <- aov(area~source, data = site_areas)
summary(source.aov)
## Df Sum Sq Mean Sq F value Pr(>F)
## source 2 0.1767 0.08834 1.575 0.259
## Residuals 9 0.5047 0.05608
No effect of source
TukeyHSD(source.aov)
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = area ~ source, data = site_areas)
##
## $source
## diff lwr upr p adj
## snowmelt-glacier 0.23870366 -0.2288277 0.7062350 0.3690835
## sub_ice-glacier -0.03401843 -0.5015498 0.4335129 0.9775617
## sub_ice-snowmelt -0.27272209 -0.7402534 0.1948092 0.2834387
No significant differences after adding 2022 data
First, need to calculate richness (#species present) for each site - NOTE: using ALL taxa from sites with >1yr data here, NOT the subset of taxa used for NMDS.
richness <- invert_avgs %>%
filter(!Stream%in%one.yr$Stream)%>%
group_by(Stream, Year, source, full_name)%>% #group by stream/year
summarise(richness = length(unique(taxa_lwr))) #calculate # of unique values of taxa_lwr for each stream + year combo
## `summarise()` has grouped output by 'Stream', 'Year', 'source'. You can
## override using the `.groups` argument.
head(richness
)
## # A tibble: 6 × 5
## # Groups: Stream, Year, source [6]
## Stream Year source full_name richness
## <chr> <int> <chr> <chr> <int>
## 1 cloudveil 2019 glacier Cloudveil 5
## 2 cloudveil 2020 glacier Cloudveil 5
## 3 cloudveil 2021 glacier Cloudveil 8
## 4 cloudveil 2022 glacier Cloudveil 13
## 5 delta 2017 glacier Delta 14
## 6 delta 2018 glacier Delta 14
Save file for future analysis:
write.csv(richness, "invert_data/cleaned_csv/invert_richness.csv")
Plotting - how has richness changed over time at all of these sites?
richness.line <- ggplot(richness, aes(x = Year, y = richness, color = Stream))+
geom_point()+
geom_line()+
geom_smooth(se = F, method = "lm", linetype = "dashed", size = 0.5)+
theme_classic()+
labs(x = "Year", y = "Species Richness", color = "Stream")+
scale_color_manual(values = c(brewer.pal(12, "Set3"), "#000000")
#, labels = c("Middle Teton", "North Teton", "AK Basin S", "S Cascade", "S Teton", "Wind Cave")
)
richness.line
## `geom_smooth()` using formula 'y ~ x'
Appears to be a decreasing trend in richness across most sites - are any of these trends significant?
(save this plot)
ggsave("invert_data/plots/richness_line.pdf", richness.line, device = "pdf", height = 4, width = 6.5, units = "in", dpi = "retina")
## `geom_smooth()` using formula 'y ~ x'
First, nest data based on site:
richness_nested <- richness %>%
nest(data = -Stream) #create new nested dataframe with one column for Stream, one column for data - data entries for each site include year and richness.
Next, run a model for each site separately and return a summary table with the results for each site:
richness_models <- richness_nested %>%
mutate(model = map(data, ~lm(richness~Year, data = .)%>% #run a model of richness~year for each site (referenced by "."); store results in new column called "model"
tidy))%>% #tidy function to construct a summary table with model results (seperate tables for each site)
unnest(model) %>% #unnest model column to get rows for each site, columns for model metrics
filter(term == "Year") #remove intercept term to just get info on year for each site
richness_models
## # A tibble: 13 × 7
## # Groups: Stream [13]
## Stream data term estimate std.error statistic p.value
## <chr> <list> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 cloudveil <gropd_df [4 × 4]> Year 2.7 0.794 3.40 0.0766
## 2 delta <gropd_df [6 × 4]> Year -0.714 0.654 -1.09 0.336
## 3 grizzly <gropd_df [6 × 4]> Year 0.457 0.556 0.822 0.458
## 4 gusher <gropd_df [3 × 4]> Year 2.50 2.60 0.962 0.512
## 5 mid_teton <gropd_df [8 × 4]> Year -0.429 0.397 -1.08 0.322
## 6 n_teton <gropd_df [8 × 4]> Year -0.774 0.794 -0.975 0.367
## 7 paintbrush <gropd_df [6 × 4]> Year -0.286 0.379 -0.753 0.493
## 8 s_ak_basin <gropd_df [7 × 4]> Year -0.893 0.724 -1.23 0.272
## 9 s_cascade <gropd_df [7 × 4]> Year -2.28 0.756 -3.02 0.0294
## 10 s_teton <gropd_df [8 × 4]> Year -0.774 0.717 -1.08 0.322
## 11 silver_run <gropd_df [2 × 4]> Year 1.00 NaN NaN NaN
## 12 skillet <gropd_df [6 × 4]> Year 0.343 0.845 0.406 0.706
## 13 windcave <gropd_df [8 × 4]> Year -0.464 0.314 -1.48 0.190
Significant declines (p<0.05) in richness at s cascade, weakly significant (p<0.1) decreases at Cloudveil.
Richness at 12 TASR sties, add trendline for overall average
Data prep:
tasr.sites <- c("cloudveil", "delta", "grizzly", "gusher", "mid_teton", "n_teton", "paintbrush", "s_ak_basin", "s_cascade", "s_teton", "skillet", "windcave") #make list of TASR sites
tasr_richness <- richness %>%
filter(Stream%in%tasr.sites) #remove non-TASR sites
Plotting:
tasr.richness <- ggplot(tasr_richness, aes(x = Year, y = richness, color = full_name))+
geom_point(alpha = 0.5)+
geom_line(alpha = 0.5)+
geom_line(stat = "smooth", method = "lm", formula = y~x, linetype = "dashed", size = 0.5, alpha = 0.5)+
geom_smooth(aes(x = Year, y = richness), se = F, method = "lm", linetype = "dashed", size = 1, color = "Black")+
theme_classic()+
labs(x = "Year", y = "Species Richness", color = "Stream")+
scale_color_manual(values = c(brewer.pal(8, "Dark2"), brewer.pal(4, "Set1")))+
theme(axis.title = element_text(size = 11, face = "bold"),
axis.text = element_text(size = 10),
legend.text = element_text(size = 10),
legend.title = element_text(size = 11, face = "bold"))+
ylim(0, 32)
tasr.richness
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 3 row(s) containing missing values (geom_path).
Save:
ggsave("pres_figures/tasr_richness.pdf", tasr.richness, width = 12, height = 5, units = "in", device = "pdf", dpi = "retina")
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 3 row(s) containing missing values (geom_path).
Calculate total richness (no taxa) observed across all sites for each year NOTE: using ALL taxa from sites with >1yr data here, NOT the subset of taxa used for NMDS.
yearly_richness <- invert_avgs %>%
filter(!Stream%in%one.yr$Stream)%>%
group_by(Year)%>%
summarise(total_richness = length(unique(taxa_lwr)))
Plotting:
yearly.richness <- ggplot(yearly_richness, aes(x = Year, y = total_richness, color = as.numeric(Year)))+
geom_point()+
geom_line()+
geom_smooth(aes(x = Year, y= total_richness, color = as.numeric(Year)), se = F, method = "lm", linetype = "dashed", size = 0.5)+
theme_classic()+
labs(x = "Year", y = "Species Richness", color = "Year")+
scale_color_gradient(low = "#C6DBEF", high = "#084594", labels = c(2015, 2018, 2021), breaks = c(2015, 2018, 2021))+
theme(legend.position = "none")
yearly.richness
## `geom_smooth()` using formula 'y ~ x'
Save:
ggsave("invert_data/plots/yearly_richness.pdf", yearly.richness, width = 6.5, height = 4, units = "in", dpi = "retina")
## `geom_smooth()` using formula 'y ~ x'
Basic LM:
yr.rich <- lm(total_richness~Year, data = yearly_richness)
summary(yr.rich)
##
## Call:
## lm(formula = total_richness ~ Year, data = yearly_richness)
##
## Residuals:
## Min 1Q Median 3Q Max
## -8.869 -3.429 1.250 2.417 10.369
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -3516.655 2070.376 -1.699 0.140
## Year 1.762 1.026 1.718 0.137
##
## Residual standard error: 6.647 on 6 degrees of freedom
## Multiple R-squared: 0.3297, Adjusted R-squared: 0.2179
## F-statistic: 2.951 on 1 and 6 DF, p-value: 0.1366
No significant trend.
First, extract paintbrush data:
pb_inverts <- richness %>%
filter(Stream == "paintbrush")
Read in paintbrush exposure dates (saved from teton_data_cleaning script); calculate no days exposed per year:
pb_exposure <- read.csv("Temperature/pb_exposure.csv")%>%
group_by(yr)%>%
summarise(no_days = length(exposed_dates))%>%
rename(Year = yr)
Merge with invert data:
pb_full <- merge(pb_inverts, pb_exposure, all = T)%>%
# mutate(no_days = ifelse(Year == 2017, 0, no_days))%>% #replacing 2017 "NA" with 0 dry days
filter(!is.na(richness), !is.na(no_days))
Visual check for trends:
pb.richness <- ggplot(pb_full, aes(x = no_days, y = richness))+
geom_point()+
geom_line()+
geom_smooth(method = "lm", se = F, linetype = "dashed")+
theme_classic()+
labs(x = "Number dry days", y = "Taxonomic Richness")
pb.richness
## `geom_smooth()` using formula 'y ~ x'
Fairly strong negative trend, but very little data - so hard to tell if that’s a “real” trend.
ggsave("invert_data/plots/pb_richness.pdf", pb.richness, device = "pdf", width = 6.5, height = 4, units = "in", dpi = "retina")
## `geom_smooth()` using formula 'y ~ x'
Basic LM:
pb.mod <- lm(richness~no_days, data = pb_full)
summary(pb.mod)
##
## Call:
## lm(formula = richness ~ no_days, data = pb_full)
##
## Residuals:
## 1 2 3
## -0.1203 -0.2707 0.3910
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 13.05263 2.44739 5.333 0.118
## no_days -0.16541 0.05209 -3.175 0.194
##
## Residual standard error: 0.4905 on 1 degrees of freedom
## Multiple R-squared: 0.9098, Adjusted R-squared: 0.8195
## F-statistic: 10.08 on 1 and 1 DF, p-value: 0.1942
No significant trends, which is unsurprising bc of the very limited data - trend may emerge with longer term dataset. R2 is pretty high.
First, re-arrange data to get list of all unique taxa for each year:
yr_taxa <- invert_avgs %>%
select(Year, taxa_lwr)%>% #select year and taxa columns
group_by(Year)%>% #group by year
summarise(taxa = unique(taxa_lwr))%>% #extract all unique values of taxa for each year
mutate(taxa_yr = paste(Year, taxa, sep = "_"))%>% #add new col with taxa and year
pivot_wider(names_from = Year, values_from = taxa_yr)%>% #pivot wider to get columns for each year, rows for each taxa - yearly columns will have NA's when a taxa isn't present.
arrange(taxa)
## Warning: Returning more (or less) than 1 row per `summarise()` group was deprecated in
## dplyr 1.1.0.
## ℹ Please use `reframe()` instead.
## ℹ When switching from `summarise()` to `reframe()`, remember that `reframe()`
## always returns an ungrouped data frame and adjust accordingly.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## `summarise()` has grouped output by 'Year'. You can override using the
## `.groups` argument.
Next, convert to binary “1” for present, “0” for absent:
to.binary <- function(col){
col = ifelse(is.na(col), 0, 1) #write function to convert replace all NA's with 0; all other values with 1
}
yr_binary <- yr_taxa %>%
mutate(across(.cols = c(2:9), to.binary)) #apply this function across all year columns in yr_taxa
Check the resulting output:
head(yr_binary)
## # A tibble: 6 × 9
## taxa `2015` `2016` `2017` `2018` `2019` `2020` `2021` `2022`
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 acari 1 1 1 1 1 1 1 1
## 2 agathon 0 0 1 0 1 1 1 1
## 3 allomyia 1 1 1 1 1 1 1 1
## 4 alloperla 1 1 0 0 0 0 0 0
## 5 baetidae 1 0 0 0 0 0 0 0
## 6 baetis 0 1 1 1 1 1 1 1
Want to calculate the cumulative richness over time - i.e. only count taxa that haven’t been seen in previous years. Based on the above format, need to keep ONLY the first observation of each taxa, remove all others.
First, re-format data to get rows for each year, columns for each taxa, fill rows with binary presence/absence data:
binary_wide <- yr_binary%>%
pivot_longer(!taxa, values_to = "presence", names_to = "yr")%>% #first pivot longer to get taxa, presence, and year columns
pivot_wider(id_cols = yr, names_from = taxa, values_from = presence) #then pivot wider using year as ID col instead of taxa to get cols for each taxa, fill in with presence/absence data
Next, need to replace all subsequent observations with a 0 to get a dataframe that has a 1 for a given taxa in the year that that taxa first ocurred, 0 in all other rows for that taxa.
apperance.calc <- function(x){ #write a function to:
x = cumsum(x) #a. calculate the cumulative sum of each column
x = ifelse(x == 1 &lag(x) == 0, 1, 0) #b. replace all cells that are not = 1 and don't have 0 in the previous cell with 0's.
}
binary_lag <- binary_wide %>%
mutate(across(c(2:ncol(binary_wide)), apperance.calc))#apply to all taxa cols
binary_lag[1,]<-binary_wide[1,] #replace first row (will be all NAs) with first row from original wide dataframe
Now, calculate new taxa observed in each year by calculating row sums for all taxa columns:
new_taxa <- rowSums(binary_lag[,2:ncol(binary_lag)])
Add this to a new dataframe with year; calculate cumulative taxa observed each year:
cumulative_richness <- data.frame(c(2015:2022), new_taxa)%>% #bind new_taxa object with vector of years
rename(yr = 1)%>% #rename first column "yr"
mutate(cumulative_taxa =cumsum(new_taxa)) #calculate cumulative taxa observed for each year
Plot:
cum.taxa <- ggplot(cumulative_richness, aes(x = yr, y = cumulative_taxa))+ #plot x = year, y = cumulative taxa
geom_point()+
geom_line()+
theme_classic()+
labs(x = "Year", y = "Cumulative Taxa Observed")
cum.taxa
Save:
ggsave("invert_data/plots/cumulative_taxa.pdf", cum.taxa, width = 6.5, height = 4, units = "in", device = "pdf", dpi = "retina")
LM:
cum.taxa.lm <- lm(cumulative_taxa~yr, data = cumulative_richness)
summary(cum.taxa.lm)
##
## Call:
## lm(formula = cumulative_taxa ~ yr, data = cumulative_richness)
##
## Residuals:
## Min 1Q Median 3Q Max
## -6.7381 -0.5208 0.4881 1.7381 3.4881
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.764e+04 1.031e+03 -17.11 2.55e-06 ***
## yr 8.774e+00 5.107e-01 17.18 2.49e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.31 on 6 degrees of freedom
## Multiple R-squared: 0.9801, Adjusted R-squared: 0.9768
## F-statistic: 295.1 on 1 and 6 DF, p-value: 2.49e-06
Significant increase in cumulative richness over time, but that could be a side effect of having data from more sites in later years.
First, read in cleaned temperature dataset:
temps <- read.csv("Temperature/cleaned_full_datasets/temps_daily.csv")
head(temps)
## X site date1 temp_xbar temp_max temp_min lat long
## 1 1 cloudveil 2019-08-02 2.185000 2.503 1.886 43.72551 -110.7961
## 2 2 cloudveil 2019-08-03 2.305583 2.690 1.994 43.72551 -110.7961
## 3 3 cloudveil 2019-08-04 2.352583 2.717 2.101 43.72551 -110.7961
## 4 4 cloudveil 2019-08-05 2.406000 2.797 2.047 43.72551 -110.7961
## 5 5 cloudveil 2019-08-06 2.439458 2.850 2.128 43.72551 -110.7961
## 6 6 cloudveil 2019-08-07 2.497458 2.877 2.262 43.72551 -110.7961
## temp_change
## 1 0.617
## 2 0.696
## 3 0.616
## 4 0.750
## 5 0.722
## 6 0.615
Next, calculate mean summer (July-Aug) stream temperature for each year at each site:
summer_temps <- temps %>%
mutate(date1 = ymd(date1), #recode date as r date obj
mo = month(date1), #new col with month
yr = year(date1))%>% #new col with year
filter(mo == 6 | mo == 7 | mo ==8)%>% #extract rows with month = 6, 7, or 8
group_by(site, yr)%>% #group by site and year
summarise(t_xbar = mean(temp_xbar, na.rm = T), #calculate mean summer temp
tmax_xbar = mean(temp_max, na.rm = T), #calculate avg max temp
tmin_xbar = mean(temp_min, na.rm = T)) %>%#calculate avg min temp
rename(Stream = site, Year = yr) #renaming for merge
## `summarise()` has grouped output by 'site'. You can override using the
## `.groups` argument.
Next, merge with richness dataset:
richness_temps <- merge(summer_temps, richness)
Visual comparison: any visible relationships between richness, mean summer temp, mean max or mean min temp?
Mean summer temp:
txbar.richness <- ggplot(filter(richness_temps, !Stream == "silver_run"), aes(x = t_xbar, y = richness, color = Stream))+
geom_point()+
geom_line()+
theme_classic()+
facet_wrap(~Stream, scales = "free_x")+
theme(legend.position = "none")+
labs(x = "Mean summer (June-Aug.) stream temperature, C", y = "Taxonomic Richness")
txbar.richness
## Warning: Removed 4 rows containing missing values (geom_point).
## Warning: Removed 4 row(s) containing missing values (geom_path).
Mean summer max temp:
tmax.richness <- ggplot(filter(richness_temps, !Stream == "silver_run"), aes(x = tmax_xbar, y = richness, color = Stream))+
geom_point()+
geom_smooth(se = F, linetype = "dashed", method = "lm")+
theme_classic()+
facet_wrap(~Stream, scales = "free_x")+
theme(legend.position = "none")+
labs(x = "Mean maximum summer (June-Aug.) stream temperature, C", y = "Taxonomic Richness")
tmax.richness
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 4 rows containing non-finite values (stat_smooth).
## Warning: Removed 4 rows containing missing values (geom_point).
Mean summer min temp:
tmin.richness <- ggplot(filter(richness_temps, !Stream == "silver_run"), aes(x = tmin_xbar, y = richness, color = Stream))+
geom_point()+
geom_smooth(se = F, linetype = "dashed", method = "lm")+
theme_classic()+
facet_wrap(~Stream, scales = "free_x")+
theme(legend.position = "none")+
labs(x = "Mean minimum summer (June-Aug.) stream temperature, C", y = "Taxonomic Richness")
tmin.richness
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 4 rows containing non-finite values (stat_smooth).
## Warning: Removed 4 rows containing missing values (geom_point).
No obvious relationships between summer temps and richness within sites - what about across sites?
st.richness.all <- ggplot(richness_temps, aes(x = t_xbar, y = richness))+
geom_point()+
geom_smooth(se = F, linetype = "dashed", method = "lm")+
theme_classic()+
theme(legend.position = "none")+
labs(x = "Mean summer (June-Aug.) stream temperature, C", y = "Taxonomic Richness")
st.richness.all
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 4 rows containing non-finite values (stat_smooth).
## Warning: Removed 4 rows containing missing values (geom_point).
No obvious trends there either.
First, nest data based on site:
rt_nested <- richness_temps %>%
nest(data = -Stream) #create new nested dataframe with one column for Stream, one column for data - data entries for each site include year and richness.
Next, run a model of richness ~ mean temp for each site separately and return a summary table with the results for each site:
rt_models <- rt_nested %>%
mutate(model = map(data, ~lm(richness~t_xbar, data = .)%>% #run a model of richness~year for each site (referenced by "."); store results in new column called "model"
tidy))%>% #tidy function to construct a summary table with model results (seperate tables for each site)
unnest(model) %>% #unnest model column to get rows for each site, columns for model metrics
filter(term == "t_xbar") #remove intercept term to just get info on year for each site
rt_models
## # A tibble: 13 × 7
## Stream data term estimate std.error statistic p.value
## <chr> <list> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 cloudveil <tibble [4 × 7]> t_xbar -2.20 4.23 -0.522 0.654
## 2 delta <tibble [6 × 7]> t_xbar -28.6 13.8 -2.07 0.107
## 3 grizzly <tibble [5 × 7]> t_xbar -1.57 0.247 -6.36 0.00785
## 4 gusher <tibble [2 × 7]> t_xbar 3.52 NaN NaN NaN
## 5 mid_teton <tibble [8 × 7]> t_xbar 0.0726 3.63 0.0200 0.985
## 6 n_teton <tibble [8 × 7]> t_xbar 1.50 0.794 1.89 0.108
## 7 paintbrush <tibble [5 × 7]> t_xbar -0.584 0.215 -2.72 0.0728
## 8 s_ak_basin <tibble [7 × 7]> t_xbar 1.42 3.90 0.363 0.735
## 9 s_cascade <tibble [7 × 7]> t_xbar 3.85 3.91 0.985 0.397
## 10 s_teton <tibble [6 × 7]> t_xbar -1.01 1.20 -0.844 0.446
## 11 silver_run <tibble [2 × 7]> t_xbar -14.5 NaN NaN NaN
## 12 skillet <tibble [5 × 7]> t_xbar -0.526 2.53 -0.208 0.849
## 13 windcave <tibble [7 × 7]> t_xbar -0.915 3.20 -0.286 0.789
Significant (-) relationship between mean summer temp and richness at Grizzly, marginally significant (-) relationship at Paintbrush and N teton.
First, read in SNOTEL data, reformat:
snotel <- read.csv("teton_snotel.csv")%>%
mutate(date = mdy(date), #recode date as date format
yr = year(date))%>% #new column with year
select(date, yr, station_name, swe_cm, snowdepth_cm) #select desired columns
Next, extract maximum SWE and snow depth for each year:
snow_max <- snotel %>%
group_by(station_name, yr)%>% #group by station name & year
summarise(swe_max = max(swe_cm, na.rm = T) #calculate max SWE
)%>%
rename(Year = yr)%>% #rename year col for merge
pivot_wider(id_cols = Year, names_from = station_name, values_from = swe_max)%>% #pivot wider to get seperate cols for each station
rename(targhee_swe = 2, phillips_swe = 3) #rename swe columns
## `summarise()` has grouped output by 'station_name'. You can override using the
## `.groups` argument.
Merge with richness dataset:
richness_swe <- merge(richness, snow_max)%>%
arrange(Stream)
Visual check for trends:
Targhee:
targhee.richness <- ggplot(filter(richness_swe, !Stream == "silver_run"), aes(x = targhee_swe, y = richness, color = Stream))+
geom_point()+
geom_smooth(se = F, linetype = "dashed", method = "lm")+
theme_classic()+
facet_wrap(~Stream)+
theme(legend.position = "none")+
labs(x = "Maximum SWE, Grand Targhee SNOTEL Station", y = "Taxonomic Richness")
targhee.richness
## `geom_smooth()` using formula 'y ~ x'
First, nest data based on site:
rswe_nested <- richness_swe %>%
nest(data = -Stream) #create new nested dataframe with one column for Stream, one column for data - data entries for each site include year and richness.
Next, run a model of richness ~ Targhee SWE for each site separately and return a summary table with the results for each site:
rswe_models <- rswe_nested %>%
mutate(model = map(data, ~lm(richness~targhee_swe, data = .)%>% #run a model of richness~year for each site (referenced by "."); store results in new column called "model"
tidy))%>% #tidy function to construct a summary table with model results (seperate tables for each site)
unnest(model) %>% #unnest model column to get rows for each site, columns for model metrics
filter(term == "targhee_swe") #remove intercept term to just get info on year for each site
rswe_models
## # A tibble: 13 × 7
## Stream data term estimate std.error statistic p.value
## <chr> <list> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 cloudveil <tibble [4 × 6]> targhee_swe -0.222 0.0799 -2.78 0.109
## 2 delta <tibble [6 × 6]> targhee_swe 0.0456 0.0584 0.780 0.479
## 3 grizzly <tibble [6 × 6]> targhee_swe -0.0296 0.0484 -0.612 0.574
## 4 gusher <tibble [3 × 6]> targhee_swe -0.102 0.242 -0.423 0.745
## 5 mid_teton <tibble [8 × 6]> targhee_swe -0.0350 0.0479 -0.730 0.493
## 6 n_teton <tibble [8 × 6]> targhee_swe -0.162 0.0730 -2.21 0.0689
## 7 paintbrush <tibble [6 × 6]> targhee_swe -0.0214 0.0411 -0.521 0.630
## 8 s_ak_basin <tibble [7 × 6]> targhee_swe 0.0315 0.0842 0.374 0.724
## 9 s_cascade <tibble [7 × 6]> targhee_swe -0.227 0.164 -1.39 0.225
## 10 s_teton <tibble [8 × 6]> targhee_swe -0.0859 0.0832 -1.03 0.342
## 11 silver_run <tibble [2 × 6]> targhee_swe -0.0750 NaN NaN NaN
## 12 skillet <tibble [6 × 6]> targhee_swe -0.0341 0.0705 -0.483 0.654
## 13 windcave <tibble [8 × 6]> targhee_swe -0.0138 0.0419 -0.330 0.753
No significant relationships between Max SWE at Targhee and richness
Phillips:
phillips.richness <- ggplot(filter(richness_swe, !Stream == "silver_run"), aes(x = phillips_swe, y = richness, color = Stream))+
geom_point()+
geom_smooth(se = F, linetype = "dashed", method = "lm")+
theme_classic()+
facet_wrap(~Stream)+
theme(legend.position = "none")+
labs(x = "Maximum SWE, Phillips Bench SNOTEL Station", y = "Taxonomic Richness")
phillips.richness
## `geom_smooth()` using formula 'y ~ x'
rswe_pb<- rswe_nested %>%
mutate(model = map(data, ~lm(richness~phillips_swe, data = .)%>% #run a model of richness~year for each site (referenced by "."); store results in new column called "model"
tidy))%>% #tidy function to construct a summary table with model results (seperate tables for each site)
unnest(model) %>% #unnest model column to get rows for each site, columns for model metrics
filter(term == "phillips_swe") #remove intercept term to just get info on year for each site
rswe_pb
## # A tibble: 13 × 7
## Stream data term estimate std.error statistic p.value
## <chr> <list> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 cloudveil <tibble [4 × 6]> phillips_s… -0.231 0.112 -2.06 0.176
## 2 delta <tibble [6 × 6]> phillips_s… 0.0336 0.0743 0.452 0.675
## 3 grizzly <tibble [6 × 6]> phillips_s… -0.00913 0.0614 -0.149 0.889
## 4 gusher <tibble [3 × 6]> phillips_s… -0.0541 0.259 -0.209 0.869
## 5 mid_teton <tibble [8 × 6]> phillips_s… -0.0619 0.0511 -1.21 0.271
## 6 n_teton <tibble [8 × 6]> phillips_s… -0.174 0.0869 -2.00 0.0924
## 7 paintbrush <tibble [6 × 6]> phillips_s… -0.0231 0.0458 -0.503 0.641
## 8 s_ak_basin <tibble [7 × 6]> phillips_s… 0.0274 0.0951 0.289 0.785
## 9 s_cascade <tibble [7 × 6]> phillips_s… -0.370 0.171 -2.17 0.0825
## 10 s_teton <tibble [8 × 6]> phillips_s… -0.113 0.0919 -1.23 0.264
## 11 silver_run <tibble [2 × 6]> phillips_s… -0.0816 NaN NaN NaN
## 12 skillet <tibble [6 × 6]> phillips_s… -0.0206 0.0876 -0.236 0.825
## 13 windcave <tibble [8 × 6]> phillips_s… -0.0203 0.0474 -0.429 0.683
Significant (-) relationship between SWE @ Phillips bench and richness at Cloudveil; close to significant at S cascade.